{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "113402a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2a7d6113",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c02a3cfa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a,b,c,d,message\r\n",
      "1,2,3,4,hello\r\n",
      "5,6,7,8,world\r\n",
      "9,10,11,12,foo"
     ]
    }
   ],
   "source": [
    "!cat examples/ex1.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "96238f1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"examples/ex1.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7f703612",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "f9a77107",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1,2,3,4,hello\r\n",
      "5,6,7,8,world\r\n",
      "9,10,11,12,foo"
     ]
    }
   ],
   "source": [
    "!cat examples/ex2.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1c8ba438",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>hello</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   1   2   3   4  hello\n",
       "0  5   6   7   8  world\n",
       "1  9  10  11  12    foo"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex2.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "f342fcfa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   0   1   2   3      4\n",
       "0  1   2   3   4  hello\n",
       "1  5   6   7   8  world\n",
       "2  9  10  11  12    foo"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex2.csv\", header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "871f1ea8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex2.csv\", names=[\"a\", \"b\", \"c\", \"d\", \"message\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5d371770",
   "metadata": {},
   "outputs": [],
   "source": [
    "names = [\"a\", \"b\", \"c\", \"d\", \"message\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9a32d60c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>message</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>hello</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>world</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>foo</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         a   b   c   d\n",
       "message               \n",
       "hello    1   2   3   4\n",
       "world    5   6   7   8\n",
       "foo      9  10  11  12"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex2.csv\", names=names, index_col=\"message\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c616ec3f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "key1,key2,value1,value2\r\n",
      "one,a,1,2\r\n",
      "one,b,3,4\r\n",
      "one,c,5,6\r\n",
      "one,d,7,8\r\n",
      "two,a,9,10\r\n",
      "two,b,11,12\r\n",
      "two,c,13,14\r\n",
      "two,d,15,16\r\n"
     ]
    }
   ],
   "source": [
    "!cat examples/csv_mindex.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9c9deb33",
   "metadata": {},
   "outputs": [],
   "source": [
    "parsed = pd.read_csv(\"examples/csv_mindex.csv\",\n",
    "                    index_col=[\"key1\", \"key2\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "82ccddee",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>value1</th>\n",
       "      <th>value2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">one</th>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">two</th>\n",
       "      <th>a</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>15</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           value1  value2\n",
       "key1 key2                \n",
       "one  a          1       2\n",
       "     b          3       4\n",
       "     c          5       6\n",
       "     d          7       8\n",
       "two  a          9      10\n",
       "     b         11      12\n",
       "     c         13      14\n",
       "     d         15      16"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "parsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c39ea943",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            A         B         C\r\n",
      "aaa -0.264438 -1.026059 -0.619500\r\n",
      "bbb  0.927272  0.302904 -0.032399\r\n",
      "ccc -0.264273 -0.386314 -0.217601\r\n",
      "ddd -0.871858 -0.348382  1.100491\r\n"
     ]
    }
   ],
   "source": [
    "!cat examples/ex3.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "f5777e9d",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.read_csv(\"examples/ex3.txt\", sep=\"\\s+\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "0f82fea9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>aaa</th>\n",
       "      <td>-0.264438</td>\n",
       "      <td>-1.026059</td>\n",
       "      <td>-0.619500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bbb</th>\n",
       "      <td>0.927272</td>\n",
       "      <td>0.302904</td>\n",
       "      <td>-0.032399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ccc</th>\n",
       "      <td>-0.264273</td>\n",
       "      <td>-0.386314</td>\n",
       "      <td>-0.217601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ddd</th>\n",
       "      <td>-0.871858</td>\n",
       "      <td>-0.348382</td>\n",
       "      <td>1.100491</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            A         B         C\n",
       "aaa -0.264438 -1.026059 -0.619500\n",
       "bbb  0.927272  0.302904 -0.032399\n",
       "ccc -0.264273 -0.386314 -0.217601\n",
       "ddd -0.871858 -0.348382  1.100491"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "97a27922",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# hey!\r\n",
      "a,b,c,d,message\r\n",
      "# just wanted to make things more difficult for you\r\n",
      "# who reads CSV files with computers, anyway?\r\n",
      "1,2,3,4,hello\r\n",
      "5,6,7,8,world\r\n",
      "9,10,11,12,foo\r\n"
     ]
    }
   ],
   "source": [
    "!cat examples/ex4.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a8b90ea0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex4.csv\", skiprows=[0,2,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "11cf3a45",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "something,a,b,c,d,message\r\n",
      "one,1,2,3,4,NA\r\n",
      "two,5,6,,8,world\r\n",
      "three,9,10,11,12,foo"
     ]
    }
   ],
   "source": [
    "!cat examples/ex5.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "64628f83",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.read_csv(\"examples/ex5.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "18d15435",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b     c   d message\n",
       "0       one  1   2   3.0   4     NaN\n",
       "1       two  5   6   NaN   8   world\n",
       "2     three  9  10  11.0  12     foo"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "b7c67dd8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something      a      b      c      d  message\n",
       "0      False  False  False  False  False     True\n",
       "1      False  False  False   True  False    False\n",
       "2      False  False  False  False  False    False"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.isna(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "09f98c31",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something     a     b      c     d  message\n",
       "0       True  True  True   True  True    False\n",
       "1       True  True  True  False  True     True\n",
       "2       True  True  True   True  True     True"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.notna(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "98d1588b",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.read_csv(\"examples/ex5.csv\", na_values=\"NULL\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "89b36b3a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b     c   d message\n",
       "0       one  1   2   3.0   4     NaN\n",
       "1       two  5   6   NaN   8   world\n",
       "2     three  9  10  11.0  12     foo"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "1eb317e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "result2 = pd.read_csv(\"examples/ex5.csv\", keep_default_na=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "178d11c3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>NA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b   c   d message\n",
       "0       one  1   2   3   4      NA\n",
       "1       two  5   6       8   world\n",
       "2     three  9  10  11  12     foo"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "5b738808",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something      a      b      c      d  message\n",
       "0      False  False  False  False  False    False\n",
       "1      False  False  False  False  False    False\n",
       "2      False  False  False  False  False    False"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result2.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "1ad92dd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "result3 = pd.read_csv(\"examples/ex5.csv\", keep_default_na=False, na_values=\"NA\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "7a253503",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b   c   d message\n",
       "0       one  1   2   3   4     NaN\n",
       "1       two  5   6       8   world\n",
       "2     three  9  10  11  12     foo"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "0fb48fa3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   something      a      b      c      d  message\n",
       "0      False  False  False  False  False     True\n",
       "1      False  False  False   True  False    False\n",
       "2      False  False  False  False  False    False"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.isna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "c093c5c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "sentinels = {\"message\": [\"foo\", \"NA\"], \"something\": [\"two\"]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "2ebb0c4f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td></td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b   c   d message\n",
       "0       one  1   2   3   4     NaN\n",
       "1       NaN  5   6       8   world\n",
       "2     three  9  10  11  12     NaN"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex5.csv\", na_values=sentinels,\n",
    "            keep_default_na=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "9ea2910f",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.max_rows = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "fa3114eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.read_csv(\"examples/ex6.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "707393b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.467976</td>\n",
       "      <td>-0.038649</td>\n",
       "      <td>-0.295344</td>\n",
       "      <td>-1.824726</td>\n",
       "      <td>L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.358893</td>\n",
       "      <td>1.404453</td>\n",
       "      <td>0.704965</td>\n",
       "      <td>-0.200638</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.501840</td>\n",
       "      <td>0.659254</td>\n",
       "      <td>-0.421691</td>\n",
       "      <td>-0.057688</td>\n",
       "      <td>G</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.204886</td>\n",
       "      <td>1.074134</td>\n",
       "      <td>1.388361</td>\n",
       "      <td>-0.982404</td>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.354628</td>\n",
       "      <td>-0.133116</td>\n",
       "      <td>0.283763</td>\n",
       "      <td>-0.837063</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>2.311896</td>\n",
       "      <td>-0.417070</td>\n",
       "      <td>-1.409599</td>\n",
       "      <td>-0.515821</td>\n",
       "      <td>L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>-0.479893</td>\n",
       "      <td>-0.650419</td>\n",
       "      <td>0.745152</td>\n",
       "      <td>-0.646038</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>0.523331</td>\n",
       "      <td>0.787112</td>\n",
       "      <td>0.486066</td>\n",
       "      <td>1.093156</td>\n",
       "      <td>K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>-0.362559</td>\n",
       "      <td>0.598894</td>\n",
       "      <td>-1.843201</td>\n",
       "      <td>0.887292</td>\n",
       "      <td>G</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>-0.096376</td>\n",
       "      <td>-1.012999</td>\n",
       "      <td>-0.657431</td>\n",
       "      <td>-0.573315</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           one       two     three      four key\n",
       "0     0.467976 -0.038649 -0.295344 -1.824726   L\n",
       "1    -0.358893  1.404453  0.704965 -0.200638   B\n",
       "2    -0.501840  0.659254 -0.421691 -0.057688   G\n",
       "3     0.204886  1.074134  1.388361 -0.982404   R\n",
       "4     0.354628 -0.133116  0.283763 -0.837063   Q\n",
       "...        ...       ...       ...       ...  ..\n",
       "9995  2.311896 -0.417070 -1.409599 -0.515821   L\n",
       "9996 -0.479893 -0.650419  0.745152 -0.646038   E\n",
       "9997  0.523331  0.787112  0.486066  1.093156   K\n",
       "9998 -0.362559  0.598894 -1.843201  0.887292   G\n",
       "9999 -0.096376 -1.012999 -0.657431 -0.573315   0\n",
       "\n",
       "[10000 rows x 5 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "3099e65d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.467976</td>\n",
       "      <td>-0.038649</td>\n",
       "      <td>-0.295344</td>\n",
       "      <td>-1.824726</td>\n",
       "      <td>L</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.358893</td>\n",
       "      <td>1.404453</td>\n",
       "      <td>0.704965</td>\n",
       "      <td>-0.200638</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.501840</td>\n",
       "      <td>0.659254</td>\n",
       "      <td>-0.421691</td>\n",
       "      <td>-0.057688</td>\n",
       "      <td>G</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.204886</td>\n",
       "      <td>1.074134</td>\n",
       "      <td>1.388361</td>\n",
       "      <td>-0.982404</td>\n",
       "      <td>R</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.354628</td>\n",
       "      <td>-0.133116</td>\n",
       "      <td>0.283763</td>\n",
       "      <td>-0.837063</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.817480</td>\n",
       "      <td>0.742273</td>\n",
       "      <td>0.419395</td>\n",
       "      <td>-2.251035</td>\n",
       "      <td>Q</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        one       two     three      four key\n",
       "0  0.467976 -0.038649 -0.295344 -1.824726   L\n",
       "1 -0.358893  1.404453  0.704965 -0.200638   B\n",
       "2 -0.501840  0.659254 -0.421691 -0.057688   G\n",
       "3  0.204886  1.074134  1.388361 -0.982404   R\n",
       "4  0.354628 -0.133116  0.283763 -0.837063   Q\n",
       "5  1.817480  0.742273  0.419395 -2.251035   Q"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"examples/ex6.csv\", nrows=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "b00cf1e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "chunker = pd.read_csv(\"examples/ex6.csv\", chunksize=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "54818c10",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.io.parsers.readers.TextFileReader"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(chunker)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "1efc618b",
   "metadata": {},
   "outputs": [],
   "source": [
    "tot = pd.Series([], dtype=\"int64\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "5b39c922",
   "metadata": {},
   "outputs": [],
   "source": [
    "for piece in chunker:\n",
    "    tot = tot.add(piece[\"key\"].value_counts(), fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "e7235e2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "tot = tot.sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "3ba8dce7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "E    368.0\n",
       "X    364.0\n",
       "L    346.0\n",
       "O    343.0\n",
       "Q    340.0\n",
       "M    338.0\n",
       "J    337.0\n",
       "F    335.0\n",
       "K    334.0\n",
       "H    330.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tot[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "dc5a8174",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv(\"examples/ex5.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "c7b88a9c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>something</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>two</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>three</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11.0</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  something  a   b     c   d message\n",
       "0       one  1   2   3.0   4     NaN\n",
       "1       two  5   6   NaN   8   world\n",
       "2     three  9  10  11.0  12     foo"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "6e44bffe",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv(\"examples/out.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "b156e851",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ",something,a,b,c,d,message\r\n",
      "0,one,1,2,3.0,4,\r\n",
      "1,two,5,6,,8,world\r\n",
      "2,three,9,10,11.0,12,foo\r\n"
     ]
    }
   ],
   "source": [
    "!cat examples/out.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "5f8aa18c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "32e1478b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "|something|a|b|c|d|message\n",
      "0|one|1|2|3.0|4|\n",
      "1|two|5|6||8|world\n",
      "2|three|9|10|11.0|12|foo\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout, sep=\"|\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "e0f3afc9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ",something,a,b,c,d,message\n",
      "0,one,1,2,3.0,4,NULL\n",
      "1,two,5,6,NULL,8,world\n",
      "2,three,9,10,11.0,12,foo\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout, na_rep=\"NULL\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "abbc25dd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one,1,2,3.0,4,\n",
      "two,5,6,,8,world\n",
      "three,9,10,11.0,12,foo\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout, index=False, header=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "4d2d32f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a,b,d\n",
      "1,2,4\n",
      "5,6,8\n",
      "9,10,12\n"
     ]
    }
   ],
   "source": [
    "data.to_csv(sys.stdout, index=False, columns=[\"a\", \"b\", \"d\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "d1d2a1b3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\"a\",\"b\",\"c\"\r\n",
      "\"1\",\"2\",\"3\"\r\n",
      "\"1\",\"2\",\"3\"\r\n"
     ]
    }
   ],
   "source": [
    "!cat examples/ex7.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "b636c7d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "7412a561",
   "metadata": {},
   "outputs": [],
   "source": [
    "f = open(\"examples/ex7.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "c89c60a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = csv.reader(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "46b3db85",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['a', 'b', 'c']\n",
      "['1', '2', '3']\n",
      "['1', '2', '3']\n"
     ]
    }
   ],
   "source": [
    "for line in reader:\n",
    "    print(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "b5170fd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "8575a17a",
   "metadata": {},
   "outputs": [],
   "source": [
    "obj = \"\"\"\n",
    "{\"name\": \"Wes\",\n",
    " \"cities_lived\": [\"Akron\", \"Nashville\", \"New York\", \"San Francisco\"],\n",
    " \"pet\": null,\n",
    " \"siblings\": [{\"name\": \"Scott\", \"age\": 34, \"hobbies\": [\"guitars\", \"soccer\"]},\n",
    "              {\"name\": \"Katie\", \"age\": 42, \"hobbies\": [\"diving\", \"art\"]}]\n",
    "}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "a3ab3aaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = json.loads(obj)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "347a925f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'name': 'Wes',\n",
       " 'cities_lived': ['Akron', 'Nashville', 'New York', 'San Francisco'],\n",
       " 'pet': None,\n",
       " 'siblings': [{'name': 'Scott', 'age': 34, 'hobbies': ['guitars', 'soccer']},\n",
       "  {'name': 'Katie', 'age': 42, 'hobbies': ['diving', 'art']}]}"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "ef095f94",
   "metadata": {},
   "outputs": [],
   "source": [
    "asjson = json.dumps(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "e728e71f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"name\": \"Wes\", \"cities_lived\": [\"Akron\", \"Nashville\", \"New York\", \"San Francisco\"], \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\": 34, \"hobbies\": [\"guitars\", \"soccer\"]}, {\"name\": \"Katie\", \"age\": 42, \"hobbies\": [\"diving\", \"art\"]}]}'"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "asjson"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "2a4dc83d",
   "metadata": {},
   "outputs": [],
   "source": [
    "siblings = pd.DataFrame(result[\"siblings\"], columns=[\"name\", \"age\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "b4c8492b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Scott</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Katie</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    name  age\n",
       "0  Scott   34\n",
       "1  Katie   42"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "siblings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "bfb1c10d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{\"a\": 1, \"b\": 2, \"c\": 3},\r\n",
      " {\"a\": 4, \"b\": 5, \"c\": 6},\r\n",
      " {\"a\": 7, \"b\": 8, \"c\": 9}]\r\n"
     ]
    }
   ],
   "source": [
    "!cat examples/example.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "19c3b5b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_json(\"examples/example.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "d287e398",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a  b  c\n",
       "0  1  2  3\n",
       "1  4  5  6\n",
       "2  7  8  9"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "38cd51f0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"a\":{\"0\":1,\"1\":4,\"2\":7},\"b\":{\"0\":2,\"1\":5,\"2\":8},\"c\":{\"0\":3,\"1\":6,\"2\":9}}"
     ]
    }
   ],
   "source": [
    "data.to_json(sys.stdout)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "6a3c1d72",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{\"a\":1,\"b\":2,\"c\":3},{\"a\":4,\"b\":5,\"c\":6},{\"a\":7,\"b\":8,\"c\":9}]"
     ]
    }
   ],
   "source": [
    "data.to_json(sys.stdout, orient=\"records\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "34ea84ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "tables = pd.read_html(\"examples/fdic_failed_bank_list.html\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "6e77820a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(tables)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "d66d9fe3",
   "metadata": {},
   "outputs": [],
   "source": [
    "failures = tables[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "62132a0f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Bank Name</th>\n",
       "      <th>City</th>\n",
       "      <th>ST</th>\n",
       "      <th>CERT</th>\n",
       "      <th>Acquiring Institution</th>\n",
       "      <th>Closing Date</th>\n",
       "      <th>Updated Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Allied Bank</td>\n",
       "      <td>Mulberry</td>\n",
       "      <td>AR</td>\n",
       "      <td>91</td>\n",
       "      <td>Today's Bank</td>\n",
       "      <td>September 23, 2016</td>\n",
       "      <td>November 17, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The Woodbury Banking Company</td>\n",
       "      <td>Woodbury</td>\n",
       "      <td>GA</td>\n",
       "      <td>11297</td>\n",
       "      <td>United Bank</td>\n",
       "      <td>August 19, 2016</td>\n",
       "      <td>November 17, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>First CornerStone Bank</td>\n",
       "      <td>King of Prussia</td>\n",
       "      <td>PA</td>\n",
       "      <td>35312</td>\n",
       "      <td>First-Citizens Bank &amp; Trust Company</td>\n",
       "      <td>May 6, 2016</td>\n",
       "      <td>September 6, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Trust Company Bank</td>\n",
       "      <td>Memphis</td>\n",
       "      <td>TN</td>\n",
       "      <td>9956</td>\n",
       "      <td>The Bank of Fayette County</td>\n",
       "      <td>April 29, 2016</td>\n",
       "      <td>September 6, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>North Milwaukee State Bank</td>\n",
       "      <td>Milwaukee</td>\n",
       "      <td>WI</td>\n",
       "      <td>20364</td>\n",
       "      <td>First-Citizens Bank &amp; Trust Company</td>\n",
       "      <td>March 11, 2016</td>\n",
       "      <td>June 16, 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>542</th>\n",
       "      <td>Superior Bank, FSB</td>\n",
       "      <td>Hinsdale</td>\n",
       "      <td>IL</td>\n",
       "      <td>32646</td>\n",
       "      <td>Superior Federal, FSB</td>\n",
       "      <td>July 27, 2001</td>\n",
       "      <td>August 19, 2014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>543</th>\n",
       "      <td>Malta National Bank</td>\n",
       "      <td>Malta</td>\n",
       "      <td>OH</td>\n",
       "      <td>6629</td>\n",
       "      <td>North Valley Bank</td>\n",
       "      <td>May 3, 2001</td>\n",
       "      <td>November 18, 2002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>544</th>\n",
       "      <td>First Alliance Bank &amp; Trust Co.</td>\n",
       "      <td>Manchester</td>\n",
       "      <td>NH</td>\n",
       "      <td>34264</td>\n",
       "      <td>Southern New Hampshire Bank &amp; Trust</td>\n",
       "      <td>February 2, 2001</td>\n",
       "      <td>February 18, 2003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>545</th>\n",
       "      <td>National State Bank of Metropolis</td>\n",
       "      <td>Metropolis</td>\n",
       "      <td>IL</td>\n",
       "      <td>3815</td>\n",
       "      <td>Banterra Bank of Marion</td>\n",
       "      <td>December 14, 2000</td>\n",
       "      <td>March 17, 2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>546</th>\n",
       "      <td>Bank of Honolulu</td>\n",
       "      <td>Honolulu</td>\n",
       "      <td>HI</td>\n",
       "      <td>21029</td>\n",
       "      <td>Bank of the Orient</td>\n",
       "      <td>October 13, 2000</td>\n",
       "      <td>March 17, 2005</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>547 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             Bank Name             City  ST   CERT  \\\n",
       "0                          Allied Bank         Mulberry  AR     91   \n",
       "1         The Woodbury Banking Company         Woodbury  GA  11297   \n",
       "2               First CornerStone Bank  King of Prussia  PA  35312   \n",
       "3                   Trust Company Bank          Memphis  TN   9956   \n",
       "4           North Milwaukee State Bank        Milwaukee  WI  20364   \n",
       "..                                 ...              ...  ..    ...   \n",
       "542                 Superior Bank, FSB         Hinsdale  IL  32646   \n",
       "543                Malta National Bank            Malta  OH   6629   \n",
       "544    First Alliance Bank & Trust Co.       Manchester  NH  34264   \n",
       "545  National State Bank of Metropolis       Metropolis  IL   3815   \n",
       "546                   Bank of Honolulu         Honolulu  HI  21029   \n",
       "\n",
       "                   Acquiring Institution        Closing Date  \\\n",
       "0                           Today's Bank  September 23, 2016   \n",
       "1                            United Bank     August 19, 2016   \n",
       "2    First-Citizens Bank & Trust Company         May 6, 2016   \n",
       "3             The Bank of Fayette County      April 29, 2016   \n",
       "4    First-Citizens Bank & Trust Company      March 11, 2016   \n",
       "..                                   ...                 ...   \n",
       "542                Superior Federal, FSB       July 27, 2001   \n",
       "543                    North Valley Bank         May 3, 2001   \n",
       "544  Southern New Hampshire Bank & Trust    February 2, 2001   \n",
       "545              Banterra Bank of Marion   December 14, 2000   \n",
       "546                   Bank of the Orient    October 13, 2000   \n",
       "\n",
       "          Updated Date  \n",
       "0    November 17, 2016  \n",
       "1    November 17, 2016  \n",
       "2    September 6, 2016  \n",
       "3    September 6, 2016  \n",
       "4        June 16, 2016  \n",
       "..                 ...  \n",
       "542    August 19, 2014  \n",
       "543  November 18, 2002  \n",
       "544  February 18, 2003  \n",
       "545     March 17, 2005  \n",
       "546     March 17, 2005  \n",
       "\n",
       "[547 rows x 7 columns]"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "failures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "8af4f180",
   "metadata": {},
   "outputs": [],
   "source": [
    "close_timestamp = pd.to_datetime(failures[\"Closing Date\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "fff83f76",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2010    157\n",
       "2009    140\n",
       "2011     92\n",
       "2012     51\n",
       "2008     25\n",
       "       ... \n",
       "2004      4\n",
       "2001      4\n",
       "2007      3\n",
       "2003      3\n",
       "2000      2\n",
       "Name: Closing Date, Length: 15, dtype: int64"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "close_timestamp.dt.year.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "66e26de6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from lxml import objectify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "675a75f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.read_csv(\"examples/ex1.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "8d97f5b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "26d213fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_pickle(\"examples/frame_pickle\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "3a308bc9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_pickle(\"examples/frame_pickle\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "b8822eaa",
   "metadata": {},
   "outputs": [],
   "source": [
    "xlsx = pd.ExcelFile(\"examples/ex1.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "f61a59fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Sheet1']"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlsx.sheet_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "d75f8c79",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  a   b   c   d message\n",
       "0           0  1   2   3   4   hello\n",
       "1           1  5   6   7   8   world\n",
       "2           2  9  10  11  12     foo"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlsx.parse(sheet_name=\"Sheet1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "abc9d715",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>message</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>hello</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>world</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>foo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   a   b   c   d message\n",
       "0  1   2   3   4   hello\n",
       "1  5   6   7   8   world\n",
       "2  9  10  11  12     foo"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xlsx.parse(sheet_name=\"Sheet1\", index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "19871119",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.read_excel(\"examples/ex1.xlsx\", sheet_name=\"Sheet1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "c20f4e57",
   "metadata": {},
   "outputs": [],
   "source": [
    "writer = pd.ExcelWriter(\"examples/ex2.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "e2b67870",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_excel(writer, \"Sheet1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "7fbdbaa6",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_excel(\"examples/ex2.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "8cf61227",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame = pd.DataFrame({\"a\": np.random.standard_normal(100)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "9dbb5b35",
   "metadata": {},
   "outputs": [],
   "source": [
    "store = pd.HDFStore(\"examples/mydata.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "60a860da",
   "metadata": {},
   "outputs": [],
   "source": [
    "store[\"obj\"] = frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "363f10b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "store[\"obj1_col\"] = frame[\"a\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "9e201567",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<class 'pandas.io.pytables.HDFStore'>\n",
       "File path: examples/mydata.h5"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "eb103830",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.971905</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.386929</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.232824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.696200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.340607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>-1.766186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>-2.064353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>1.545288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>-2.174280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>-0.992945</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           a\n",
       "0  -0.971905\n",
       "1   0.386929\n",
       "2   0.232824\n",
       "3   1.696200\n",
       "4  -0.340607\n",
       "..       ...\n",
       "95 -1.766186\n",
       "96 -2.064353\n",
       "97  1.545288\n",
       "98 -2.174280\n",
       "99 -0.992945\n",
       "\n",
       "[100 rows x 1 columns]"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store[\"obj\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "7a791fe2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    -0.971905\n",
       "1     0.386929\n",
       "2     0.232824\n",
       "3     1.696200\n",
       "4    -0.340607\n",
       "        ...   \n",
       "95   -1.766186\n",
       "96   -2.064353\n",
       "97    1.545288\n",
       "98   -2.174280\n",
       "99   -0.992945\n",
       "Name: a, Length: 100, dtype: float64"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store[\"obj1_col\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "0ef8210e",
   "metadata": {},
   "outputs": [],
   "source": [
    "frame.to_hdf(\"examples/mydata.h5\", \"obj3\", format=\"table\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "59f9857a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "e07ff72b",
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://api.github.com/repos/pandas-dev/pandas/issues\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "4f903b08",
   "metadata": {},
   "outputs": [],
   "source": [
    "resp = requests.get(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "f2b1182a",
   "metadata": {},
   "outputs": [],
   "source": [
    "resp.raise_for_status()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "b745f667",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Response [200]>"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "1c08556e",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = resp.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "ea9aaa63",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CLN: simplify groupby wrapping'"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0][\"title\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "f39b4730",
   "metadata": {},
   "outputs": [],
   "source": [
    "issues = pd.DataFrame(data, columns=[\"number\", \"title\", \"labels\", \"state\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "73bc9648",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>number</th>\n",
       "      <th>title</th>\n",
       "      <th>labels</th>\n",
       "      <th>state</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>51259</td>\n",
       "      <td>CLN: simplify groupby wrapping</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>51258</td>\n",
       "      <td>BUG: IntervalArray.shift with invalid NA fill_...</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>51257</td>\n",
       "      <td>TST: clean up some groupby xfails</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>51256</td>\n",
       "      <td>BUG: After running `to_timestamp` on `Series` ...</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>51255</td>\n",
       "      <td>BUG: Groupby.filter with pd.NA</td>\n",
       "      <td>[]</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>51229</td>\n",
       "      <td>DOC: add/fix Timedeltas docstrings</td>\n",
       "      <td>[{'id': 134699, 'node_id': 'MDU6TGFiZWwxMzQ2OT...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>51227</td>\n",
       "      <td>PERF: ArrowExtensionArray.to_numpy(dtype=object)</td>\n",
       "      <td>[{'id': 8935311, 'node_id': 'MDU6TGFiZWw4OTM1M...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>51225</td>\n",
       "      <td>BUG: dtype=\"int64[pyarrow]foobar[pyarrow]\" fai...</td>\n",
       "      <td>[{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>51223</td>\n",
       "      <td>ENH: add unit, as_unit to dt accessor</td>\n",
       "      <td>[{'id': 3713792788, 'node_id': 'LA_kwDOAA0YD87...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>51219</td>\n",
       "      <td>CI/TST: Mark test_basic_series_frame_alignment...</td>\n",
       "      <td>[{'id': 48070600, 'node_id': 'MDU6TGFiZWw0ODA3...</td>\n",
       "      <td>open</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    number                                              title  \\\n",
       "0    51259                     CLN: simplify groupby wrapping   \n",
       "1    51258  BUG: IntervalArray.shift with invalid NA fill_...   \n",
       "2    51257                  TST: clean up some groupby xfails   \n",
       "3    51256  BUG: After running `to_timestamp` on `Series` ...   \n",
       "4    51255                     BUG: Groupby.filter with pd.NA   \n",
       "..     ...                                                ...   \n",
       "25   51229                 DOC: add/fix Timedeltas docstrings   \n",
       "26   51227   PERF: ArrowExtensionArray.to_numpy(dtype=object)   \n",
       "27   51225  BUG: dtype=\"int64[pyarrow]foobar[pyarrow]\" fai...   \n",
       "28   51223              ENH: add unit, as_unit to dt accessor   \n",
       "29   51219  CI/TST: Mark test_basic_series_frame_alignment...   \n",
       "\n",
       "                                               labels state  \n",
       "0                                                  []  open  \n",
       "1                                                  []  open  \n",
       "2                                                  []  open  \n",
       "3   [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "4                                                  []  open  \n",
       "..                                                ...   ...  \n",
       "25  [{'id': 134699, 'node_id': 'MDU6TGFiZWwxMzQ2OT...  open  \n",
       "26  [{'id': 8935311, 'node_id': 'MDU6TGFiZWw4OTM1M...  open  \n",
       "27  [{'id': 76811, 'node_id': 'MDU6TGFiZWw3NjgxMQ=...  open  \n",
       "28  [{'id': 3713792788, 'node_id': 'LA_kwDOAA0YD87...  open  \n",
       "29  [{'id': 48070600, 'node_id': 'MDU6TGFiZWw0ODA3...  open  \n",
       "\n",
       "[30 rows x 4 columns]"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "issues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "800108d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sqlite3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "16dd1856",
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "        CREATE TABLE test\n",
    "        (a VARCHAR(20), b VARCHAR(20),\n",
    "        c REAL,        d INTEGER\n",
    "        );\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2be5b805",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
